#!/usr/bin/perl -w
# Bug reports and comments to Igor.Ermolaev@intel.com
use strict;
no warnings 'portable';
use File::Spec;

use constant { FALSE=>0, TRUE=>-1 };

my @DEFAULT_BASES = ( { base=>0x400000 }, { base=>0x300000000000, step=>0x001000000000 } );

my $loop_rqrd  = TRUE;
my $func_rqrd  = TRUE;
my $csv_rqrd   = FALSE;
my $dump_rqrd  = FALSE;
my $events_flt = 0;
my $dump_sptd  = FALSE;
my $key_event  = 'instrs';

if   ( $#ARGV >  0 && $ARGV[0] eq '-k'    ){ shift @ARGV;$key_event = $ARGV[0];shift @ARGV; }
if   ( $#ARGV > -1 && $ARGV[0] eq '-f'    ){ $loop_rqrd = FALSE;shift @ARGV; }
elsif( $#ARGV > -1 && $ARGV[0] eq '-l'    ){ $func_rqrd = FALSE;shift @ARGV; }
if   ( $#ARGV > -1 && $ARGV[0] eq '-d'    )
{
   $dump_rqrd = TRUE;shift @ARGV;
   if( $#ARGV > -1 && $ARGV[0] =~ /\d+/o  ){ $events_flt = int $ARGV[0];shift @ARGV; }
}
if   ( $#ARGV > -1 && $ARGV[0] eq '--csv' ){ $csv_rqrd  = TRUE ;shift @ARGV; }
if   ( $#ARGV >  0 && $ARGV[0] eq '-b'    )
{
   shift @ARGV;
   for(@ARGV){ if( /([[:xdigit:]]+)\:([[:xdigit:]]+)/o ){ push @DEFAULT_BASES, { base=>hex $1, step=>hex$2 }; } }
   @DEFAULT_BASES = sort{ $a->{base}<=>$b->{base} }@DEFAULT_BASES;
}

our %modules    = ();
our %processes  = ();
my  %bbs        = ();
our %loops      = ();
our @info       = ();
our $idx        = undef;
our $blk_lst    = undef;
our $blk_idx    = undef;
our $loop       = -1;
our $depth      = 0;
our %funcs      = ();
our @funcs_info = ();
my  $process    = undef;

sub begin_loop
{
   my( $process, $module, $func, $id, $address ) = @_;
   $loops{$module}{$id} = { address=>$address } if !defined $loops{$module} || !defined $loops{$module}{$id};
   my $l = $loops{$module}{$id};
   $address == $l->{address} or die $!;
   if( !defined $l->{index} )
   {
      $info[$idx]{inner} = FALSE if defined $idx;
      push @info, { base=>( exists $processes{$process}{$module} ? $processes{$process}{$module} : $processes{''}{$module} ), function=>$funcs{$module}{$func}, blocks=>[], instrs=>0, cycles=>0, events=>0, module=>File::Spec->canonpath( File::Spec->catfile( $modules{$module}{directory}, $modules{$module}{name} ) ) };
      $l->{index} = $#info;
   }
   $idx     = $l->{index};
   $blk_lst = undef;
   $blk_idx = undef;
   $loop    = $depth;
   $depth++;
}

sub add_events_to_func
{
   my( $module, $func, $instrs, $cycles, $events ) = @_;
   my $fidx = undef;
   if( !defined $funcs{$module} || !defined $funcs{$module}{$func} )
   {
      push @funcs_info, { name=>$func, instrs=>0, cycles=>0, events=>0, groups=>{} };
      $fidx = $#funcs_info;
      $funcs{$module}{$func} = $fidx;
   }
   else{ $fidx = $funcs{$module}{$func}; }
   $funcs_info[$fidx]{instrs} += $instrs if defined $instrs;
   $funcs_info[$fidx]{cycles} += $cycles if defined $cycles;
   $funcs_info[$fidx]{events} += $events if defined $events;
   return $fidx;
}

sub add_blk_to_cur_func
{
   my( $addr, $size, $instrs, $cycles, $events, $tid ) = @_;
   if( !defined $info[$idx]{base} )
   {
      my $i;
      for( $i = $#DEFAULT_BASES; $i >= 0; $i-- )
      {
         if( $addr >= $DEFAULT_BASES[$i]{base} )
         {
            if( exists $DEFAULT_BASES[$i]{step} ){ $info[$idx]{base} = $addr & ~($DEFAULT_BASES[$i]{step}-1); }
            else                                 { $info[$idx]{base} = $DEFAULT_BASES[$i]{base};              }
            last;
         }
      }
      $info[$idx]{base} = 0 if $i < 0 || $i == 0 && $info[$idx]{module} =~ /\.so(\.\d+)*$/o; # hack, need module base addr info from amplifier
   }
   push @{$info[$idx]{blocks}}, { address=>( $addr - $info[$idx]{base} ), size=>$size, instrs=>$instrs, cycles=>$cycles, events=>$events, tid=>$tid };
}

my $ctb_idx = 0;
my $ctb_rte = 1;
my $ccl_idx = undef;
my $ccl_rte = undef;
my $cpu_idx = undef;
my $tid_idx = undef;
my $sze_idx = undef;
my $adr_idx = undef;
my $mdl_idx = undef;
my $fnc_idx = undef;
my $cur_adr = undef;
my $mdl_rec = undef;

while( <STDIN> )
{
   if( $dump_sptd == TRUE )
   {
      my     $line = $_;
      my $flt_sptd = FALSE;
      if( $events_flt > 0 )
      {
         if( $line =~ /^#(.*)\:([[:xdigit:]]+)$/o ){ $mdl_rec = $line;next; }
         elsif( $line =~ /^\s*([[:xdigit:]]+)\s+(\d+)(\s+\d+)+$/o )
         {
            for(split' ',$3){ if( int $_ <= $events_flt ){ $flt_sptd = TRUE;last; } }
            if( defined $mdl_rec && $flt_sptd != TRUE ){ print $mdl_rec; $mdl_rec = undef; }
         }
      }
      print $line if $flt_sptd != TRUE;
      next;
   }
   $depth-- while /\<\/\s*(items|children)\>/go;
   if( $depth <= $loop )
   {
      $info[$idx]{inner} = TRUE if defined $idx && @{$info[$idx]{blocks}};
      $idx  = undef;
      $loop = -1;
   }
   if( /\<\s*event\s+id\s*=\s*\"(\d+)\".+displayName\s*=\s*\"(.+)\"\s*rate\s*=\s*\"(\d+)\"/o )
   {
      my $idx = int $1;
      my $enm = $2;
      my $rte = int $3;
      if( $enm =~ /inst.*ret/io )
      {
         $ctb_idx = $idx;
         $ctb_rte = $rte;
      }
      elsif( $enm =~ /cpu.*clk.*unh/io && $enm !~ /ref_tsc/io )
      {
         $ccl_idx = $idx;
         $ccl_rte = $rte;
      }
   }
   elsif( /\<\s*basicBlocks\s+id\s*=\s*\"(\d+)\"\s*moduleFile\s*=\s*"\/\/\@modules\.(\d+)\"\s*address\s*=\s*\"(\d+)\"\s*size\s*=\s*\"(\d+)\"/o )
   {
      $bbs{$2}{$1} = { address=>$3, size=>$4 };
   }
   elsif( /\<\s*functions\s+id\s*=\s*\"(\d+)\"\s*moduleFile\s*=\s*\"\/\/\@modules\.(\d+)\"\s*class\s*=\s*\"[^"]+\"\s*name\s*=\s*\"([^\"]+)\"\s*fullName\s*=\s*\"([^\"]+)\"\s*linkerName\s*=\s*\"([^\"]+)\"/o )
   {
      if( !defined $funcs{$2} || !defined $funcs{$2}{$1} )
      {
         push @funcs_info, { name=>$5, instrs=>0, cycles=>0, events=>0 };
         $funcs{$2}{$1} = $#funcs_info;
      }
   }
   elsif( /\<\s*modules\s+id\s*=\s*\"(\d+)\"\s*name\s*=\s*\"([^\"]+)\"\s*directory\s*=\s*\"([^\"]+)\"/o )
   {
      $modules{$1} = { name=>$2, directory=>$3 };
   }
   elsif( /\<\s*modules\s+id\s*=\s*\"(\d+)\"\s*name\s*=\s*\"([^\"]+)\"\s*directory\s*=\s*\"\"\s*.*locatedPath\s*=\s*\"([^\"]*)\"/o )
   {
      if( $3 ne '' )
      {
         my ( $vol, $dir, $name ) = File::Spec->splitpath( $3 );
         $modules{$1} = { name=>$name, directory=>$dir };
      }
      else
      {
         $modules{$1} = { name=>$2, directory=>'unknown' };
      }
   }
   elsif( /\<\s*moduleInstances\s+loadAddress\s*=\s*\"([-\d]+)\"\s*length\s*=\s*\"\d+\"\s*fileOffset\s*=\s*\"\d+\"\s*moduleFile\s*=\s*\"\/\/\@modules\.(\d+)\"\s*(process\s*=\s*\"\/\/\@processes\.(\d+)\")?/o )
   {
      if( defined $3 ){ $processes{$4}{$2} = $1; }
      else            { $processes{''}{$2} = $1; }
   }
   elsif( /\<\s*events\s+eventDesc\s*=\s*\"\/\/\@events\.(\d+)\"\s*value\s*=\s*\"(\d+|\d+(\.\d+)?e\+\d+)\"/o )
   {
      if( defined $idx )
      {
         my $enm = undef;
         if   ( int $1 == $ctb_idx                     ){ $enm = 'instrs'; }
         elsif( defined $ccl_idx && int $1 == $ccl_idx ){ $enm = 'cycles'; }
         
         my $ii = $blk_lst;
         if( !defined $ii )
         {
            $info[$idx]{events} += int $2;
            $info[$idx]{$enm  } += int $2 if defined $enm;
            $ii = $idx;
         }
         $info[$ii]{blocks}->[$blk_idx]->{events}  += $2 if defined $blk_idx;
         $funcs_info[$info[$ii]{function}]{events} += $2 if exists $info[$ii]{function};
         if( defined $enm )
         {
            $info[$ii]{blocks}->[$blk_idx]->{$enm}  += $2 if defined $blk_idx;
            $funcs_info[$info[$ii]{function}]{$enm} += $2 if exists $info[$ii]{function};
         }
      }
   }
   elsif( /\<\s*items\s+process\s*=\s*\"\/\/\@processes\.(\d+)\"\s*moduleInstance\s*=\s*\"[^\"]+\"\s*module\s*=\s*\"[^\"]+\"\s*function\s*=\s*\"\/\/\@modules\.(\d+)\/\@functions\.(\d+)\"\s*basicBlock\s*=\"\/\/\@modules\.\d+\/\@basicBlocks\.(\d+)\"\s*type\s*=\s*\"BASIC_BLOCK\"\s*(address\s*=\s*\"(\d+)\")?/o )
   {
      if( defined $5 )
      {
         my $bb = $bbs{$2}{$4};
         $6 == $bb->{address} or die $!;
         if( !defined $bb->{index} )
         {
            push @info, { base=>( exists $processes{$1}{$2} ? $processes{$1}{$2} : $processes{''}{$2} ), function=>$funcs{$2}{$3}, blocks=>[ { address=>$6, size=>$bb->{size}, instrs=>0, cycles=>0, events=>0 } ], instrs=>0, cycles=>0, events=>0, module=>File::Spec->canonpath( File::Spec->catfile( $modules{$2}{directory}, $modules{$2}{name} ) ), inner=>TRUE };
            $bb->{index} = $#info;
         }
         $idx     = $bb->{index};
         $blk_lst = undef;
         $blk_idx = 0;
      }
      else{ $idx = undef; }
      $depth++;
   }
   elsif( /\<\s*items\s+process\s*=\s*\"\/\/\@processes\.(\d+)\"\s*moduleInstance\s*=\s*\"[^\"]+\"\s*module\s*=\s*\"[^\"]+\"\s*function\s*=\s*\"\/\/\@modules\.(\d+)\/\@functions\.(\d+)\"\s*loop2\s*=\"\/\/\@modules\.\d+\/\@loop2s\.(\d+)\"\s*(source\s*=\s*\"\/\/\@modules\.\d+\/\@sourceFiles\.(\d+)\"\s*)?type\s*=\s*\"LOOP2\"\s*(address\s*=\s*\"(\d+)\")?/o )
   {
      $process = $1;
      begin_loop $process, $2, $3, $4, $8;
   }
   elsif( /\<\s*children\s+module\s*=\s*\"[^\"]+\"\s*function\s*=\s*\"\/\/\@modules\.(\d+)\/\@functions\.(\d+)\"\s*type\s*=\s*\"LOOP2\"\s*loop2\s*=\s*\"\/\/\@modules\.\d+\/\@loop2s\.(\d+)\"\s*(address\s*=\s*\"(\d+)\")?/o )
   {
      begin_loop $process, $1, $2, $3, $5;
   }
   elsif( /\<\s*children\s+module\s*=\s*\"[^\"]+\"\s*function\s*=\s*\"\/\/\@modules\.(\d+)\/\@functions\.(\d+)\"\s*type\s*=\s*\"BASIC_BLOCK\"\s*basicBlock\s*=\s*\"\/\/\@modules\.\d+\/\@basicBlocks\.(\d+)\"\s*(address\s*=\s*\"(\d+)\")?/o )
   {
      if( defined $5 && defined $idx )
      {
         my $bb = $bbs{$1}{$3};
         $5 == $bb->{address} or die $!;
         my $blocks  = $info[$idx]{blocks};
            $blk_idx = 0;
         ++$blk_idx until $blk_idx > $#{$blocks} || $blocks->[$blk_idx]{address} == $5;
         push @{$blocks}, { address=>$5, size=>$bb->{size}, instrs=>0, cycles=>0, events=>0 } if $blk_idx > $#{$blocks};
         $blk_lst = $idx;
      }
      $depth++;
   }
   elsif( /\<\s*items\s+process\s*=\s*\"\/\/\@processes\.(\d+)\"\s*moduleInstance\s*=\s*\"[^\"]+\"\s*module\s*=\s*\"[^\"]+\"\s*function\s*=\s*\"\/\/\@modules\.(\d+)\/\@functions\.(\d+)\"\s*type\s*=\s*\"FUNCTION\"\s*(address\s*=\s*\"(\d+)\")?/o )
   {
      my $func = $funcs_info[$funcs{$2}{$3}];
      if( !defined $func->{index} )
      {
         push @info, { base=>( exists $processes{$1}{$2} ? $processes{$1}{$2} : $processes{''}{$2} ), function=>$funcs{$2}{$3}, blocks=>[], instrs=>0, cycles=>0, events=>0, module=>File::Spec->canonpath( File::Spec->catfile( $modules{$2}{directory}, $modules{$2}{name} ) ), inner=>TRUE };
         $func->{index} = $#info;
      }
      $idx     = $func->{index};
      $blk_lst = undef;
      $blk_idx = undef;
      $depth++;
   }
   elsif( /^#\d+(\s\d+)?$/o )
   {
      print $_;
      $dump_sptd = TRUE; #more uniq header???
   }
   elsif( /^.*(Basic\s*Block|BB\s*Start|Code\s*Location)\t/io ) #.*BB\s*Size\t/io )
   {
      my @header = split /\t/o, $_;
      for( my $i = 0; $i <= $#header; $i++ )
      {
         next if $header[$i] =~ /:PF\s*$/io;
         if   ( $header[$i] =~ /Basic\s*Block|BB\s*Start|Code\s*Location/io ){ $adr_idx = $i if !defined $adr_idx; }
         elsif( $header[$i] =~ /H\/W\s*Context/io ){ $cpu_idx = $i if !defined $cpu_idx; }
         elsif( $header[$i] =~ /TID/io            ){ $tid_idx = $i if !defined $tid_idx; }
         elsif( $header[$i] =~ /BB\s*Size/io      ){ $sze_idx = $i if !defined $sze_idx; }
         elsif( $header[$i] =~ /Module/io         ){ $mdl_idx = $i if !defined $mdl_idx; }
         elsif( $header[$i] =~ /Function/io       ){ $fnc_idx = $i if !defined $fnc_idx; }
         elsif( $header[$i] =~ /cpu.*clk.*unh|inst.*ret.*cycles/io && $header[$i] !~ /ref_tsc/io )
         {
            if( !defined $ccl_idx )
            {
               $ccl_idx = $i;
               if( $header[$i] =~ /:(\d+)\s*$/io ){ $ccl_rte = int $1; }
               else                               { $ccl_rte = 1;      }
            }
         }
         elsif( $header[$i] =~ /inst.*ret/io      )
         {
            if( $ctb_idx == 0 )
            {
               $ctb_idx = $i;
               if( $header[$i] =~ /:(\d+)\s*$/io ){ $ctb_rte = int $1; }
               else                               { $ctb_rte = 1;      }
            }
         }
      }
   }
   #elsif( !defined $cpu_idx && !defined $tid_idx && /^\s*\[Outside\s+any\s+(known\s+module|loop)|Unknown\]/io )
   elsif( /^\s*\[Outside\s+any\s+(known\s+module|loop)|Unknown\]/io )
   {
      $cur_adr = undef;
      $idx     = undef;
   }
   elsif( defined $cpu_idx || defined $tid_idx ) # move down when more general output be generated
   {
      my @desc = split /\t/o;
      if( $desc[$adr_idx] ne '' ){ $cur_adr = $desc[$adr_idx];$idx = undef; }
      elsif( defined $cur_adr )
      {
         my $instrs = int $desc[$ctb_idx];
         my $cycles = undef;
         my $events = $instrs;
         if( defined $ccl_idx ){ $cycles = int $desc[$ccl_idx];$events += $cycles; }
         if( $desc[$fnc_idx] ne '' && $desc[$mdl_idx] !~ /(^|\/)lib(mpi|iomp|tbb_preview).*\.so(\.\d+)*$/io ) # better filter by progress/spin functions
         {
            my $fidx = add_events_to_func $desc[$mdl_idx], $desc[$fnc_idx], $instrs, $cycles, $events;
            push @info, { function=>$fidx, inner=>TRUE, blocks=>[], instrs=>$instrs, cycles=>$cycles, events=>$events, module=>$desc[$mdl_idx] };
            $idx = $#info;
         }
         elsif( defined $idx && defined $tid_idx && $desc[$tid_idx] ne '' )
         {
            if( $cur_adr =~ /Unknown/io && int $desc[$tid_idx] != 0 ) # filter by tid
            {
               $funcs_info[$info[$idx]{function}]{instrs} -= $instrs if defined $instrs;
               $funcs_info[$info[$idx]{function}]{cycles} -= $cycles if defined $cycles;
               $funcs_info[$info[$idx]{function}]{events} -= $events if defined $events;
               $info[$idx]{instrs} -= $instrs if defined $instrs;
               $info[$idx]{cycles} -= $cycles if defined $cycles;
               $info[$idx]{events} -= $events if defined $events;
            }
            else
            {
               add_blk_to_cur_func( ( $cur_adr =~ /Unknown/io ? 0 : hex $cur_adr ), ( defined $sze_idx && $desc[$sze_idx] ne '' ? int $desc[$sze_idx] : 1 ), $instrs, $cycles, $events, int $desc[$tid_idx] );
            }
         }
         elsif( defined $idx && defined $cpu_idx && $desc[$cpu_idx] =~ /cpu_(\d+)/io )
         {
            add_blk_to_cur_func( ( $cur_adr =~ /Unknown/io ? 0 : hex $cur_adr ), ( defined $sze_idx && $desc[$sze_idx] ne '' ? int $desc[$sze_idx] : 1 ), $instrs, $cycles, $events, int $1 );
         }
         else{ $cur_adr = undef;$idx = undef; }
      }
   }
   elsif( /^((\[Loop(\@0x[[:xdigit:]]+)?\s+(at\s+line\s*\d+\s*)?in\s+([^\]]+)\s*\])|(\S+))\s+(0x[[:xdigit:]]+\s+)?((\d+(\.\d*)?\s+)+)([^\t\n]+)(\s+([^\t\n]+))?\s*$/io )
   {
      my $func = undef;
      if( defined $6 )
      {
         if( $func_rqrd == FALSE ){ $idx = undef;next; }
         $func = $6;
      }
      else
      {
         if( $loop_rqrd == FALSE || !defined $11 ){ $idx = undef;next; }
         $func = $5;
      }
      my $module = $11;
      my $instrs = 0;
      my $cycles = undef;
      my $events = 0;
      if ( defined $ctb_idx )
      {
         my @desc = split /\t/o;
         $instrs  = int $desc[$ctb_idx];
         $events  = $instrs;
         if( defined $ccl_idx )
         {
            $cycles  = int $desc[$ccl_idx];
            $events += $cycles;
         }
      }
      else
      {
         my @events = split /\s/o, $8;
         for my $s (@events){ $events += int $s if $s !~ /\.\d*$/io; }
         $instrs = $events;
      }
      my $fidx = add_events_to_func $module, $func, $instrs, $cycles, $events;
      push @info, { function=>$fidx, inner=>TRUE, blocks=>[], instrs=>$instrs, cycles=>$cycles, events=>$events, module=>$module };
      $idx = $#info;
   }
   elsif( defined $adr_idx )
   {
      my @desc   = split /\t/o;
      my $instrs = int $desc[$ctb_idx];
      my $cycles = undef;
      my $events = $instrs;
      if( defined $ccl_idx ){ $cycles = int $desc[$ccl_idx];$events += $cycles; }
      if( $adr_idx == 0 )
      {
         my $fidx = add_events_to_func $desc[$mdl_idx], $desc[$fnc_idx], $instrs, $cycles, $events;
         push @info, { function=>$fidx, inner=>TRUE, blocks=>[], instrs=>$instrs, cycles=>$cycles, events=>$events, module=>$desc[$mdl_idx] };
         $idx = $#info;
      }
      add_blk_to_cur_func hex $desc[$adr_idx], ( defined $sze_idx ? int $desc[$sze_idx] : 1 ), $instrs, $cycles, $events, 0 if defined $idx;
   }
   elsif( /^\s+0x([[:xdigit:]]+)\s+(\d+)\s+((\d+(\.\d*)?\s+)+)([^\t]+)\s+([^\t]+)\s*$/io )
   {
      if( defined $idx )
      {
         my $addr   = hex $1;
         my $size   = $2;
         my @events = split /\s/o, $3;
         my $events = 0;for my $s (@events){ $events += int $s if $s !~ /\.\d*$/io; }
         add_blk_to_cur_func $addr, $size, $events, 0, $events, 0;
      }
   }
   elsif( /^(\d+)\s+(\S+)\s+(\S+)$/o              ){ print $_; }
   elsif( /^(\d+)\s+(\S+)\s+(\S+)(\s+\d+:\d+)+$/o ){ print $_; }
   else 
   {
      chomp;  
      if   ( /^(\S+)\s+(\S+):(\S*\D\S*)$/o                             ){ print "-1 $1 $2 $3\n"; }
      elsif( /^(\S+)\s+(\S+)\s+(([^:]+:+\d+)|-a|(0x)?[[:xdigit:]]+)$/o ){ print "-1 $1 $2 $3\n"; }
      elsif( /^(\S+)\s+([^:]+:+\d+)$/o                                 ){ print "-1 $1 $2 $2\n"; }
      elsif( /^([^\s]+)\s+([^:\s]+)$/o                                 ){ print "-1 $1 $2 $2\n"; }
   }
}
@info = grep { $_->{inner} == TRUE } @info;
if( defined $cpu_idx || defined $tid_idx )
{
   @info = grep { @{$_->{blocks}} } @info;
   my %groups  = ();
   my %glb_set = ();
   my $global  = 0;
   my $last    = $#info;
   for my $i (0..$last)
   {
      my $cycles  = 0;
      my %tid_set = ();
      for(@{$info[$i]{blocks}})
      {
         if( exists $tid_set{$_->{tid}} ){ $tid_set{$_->{tid}} += $_->{cycles}; }
         else                            { $tid_set{$_->{tid}}  = $_->{cycles}; }
         $cycles += $_->{cycles};
         if( exists $glb_set{$_->{tid}} ){ $glb_set{$_->{tid}} += $_->{cycles}; }
         else                            { $glb_set{$_->{tid}}  = $_->{cycles}; }
         $global += $_->{cycles};
      }
      my $tids = join ',', (sort{$a<=>$b} keys %tid_set);
      #printf "%x %d %d %s\n", $info[$i]{blocks}[0]{address},(scalar keys %tid_set),$info[$i]{blocks}[0]{cycles},$funcs_info[$info[$i]{function}]{name};
      if( exists $groups{$tids} ){ $groups{$tids}{weight} += $cycles; }
      else                       { $groups{$tids} = { tids=>$tids, tid_cnt=>(scalar keys %tid_set), weight=>$cycles }; }
   }
   for(values %groups){ $_->{weight} /= $_->{tid_cnt}; }

   my $glbs    = join ',', (sort{$a<=>$b} keys %glb_set);
   my $glb_cnt = scalar keys %glb_set;
   printf "%.1f %d: %s\n\n", $global/$glb_cnt, $glb_cnt, $glbs;

   my @list = ();
   my @grps = sort { $b->{weight} <=> $a->{weight} } values %groups;
   for( my $q = 2; $q <= 4; $q++ )
   {
      my $max_dist = 0.0;
      my $max_grp  = undef;
      for my $g (@grps)
      {
         printf "%.1f %d: %s\n", $g->{weight}, $g->{tid_cnt}, $g->{tids};
         if( !@list )
         {
            push @list, { tid_set=>{}, weight=>$g->{weight} };
            for(split ',', $g->{tids}){ $list[-1]{tid_set}{int $_} = TRUE; }
            print "^- selected\n";
         }
         else
         {
            my @tids    = map int, (split ',', $g->{tids});
            my $tid_cnt = scalar @tids;
            my $uniq    = 0.0;
            for my $t (@tids)
            {
               my $w  = $g->{weight};
               for(@list){ if( exists $_->{tid_set}{$t} ){ $w = 0.0;last; } }
               $uniq += $w;
            }
            my $dupl    = undef;
            my $max_cmn = int(0.5*($tid_cnt-0.5)); # at least 50% of g should belong to l
            for my $l (@list)
            {
               my $cmn = 0;for(@tids){ $cmn++ if exists $l->{tid_set}{$_}; }
               if( $cmn > int(0.5*$tid_cnt) && $cmn > int(0.5*(scalar keys %{$l->{tid_set}})) ){ $uniq=0.0;$dupl=undef;last; } # more 50% of l and g is common
               if( $cmn > $max_cmn )
               {
                  $max_cmn = $cmn;
                  $dupl    = $cmn*$g->{weight};#((scalar keys %{$l->{tid_set}})-$cmn)*$g->{weight};
               }
            }
            my $dist = !defined $dupl || $uniq >= $dupl ? $uniq : $dupl;
            $dupl=0 if !defined $dupl;print "($uniq:$dupl)\n";
            if( $dist > $max_dist )
            {
               $max_dist = $dist;
               $max_grp  = $g;
               print "^- selected\n";
            }
         }
      }
      if( defined $max_grp )
      {
         push @list, { tid_set=>{}, weight=>$max_grp->{weight} };
         for(map int, (split ',', $max_grp->{tids})){ $list[-1]{tid_set}{$_} = TRUE; }
      }
      else{ last; }
      for(@list){ printf "SET: %.1f %d: %s\n", $_->{weight}, (scalar keys %{$_->{tid_set}}), (join ',', (sort{$a<=>$b} keys %{$_->{tid_set}})); }
   }
   my $entry_rqrd = TRUE;
   for my $t (keys %glb_set)
   {
      my $tid_sptd = FALSE; 
      for(@list){ if( exists $_->{tid_set}{$t} ){ $tid_sptd=TRUE;last; } }
      if( $tid_sptd == FALSE )
      {
         if( $entry_rqrd == TRUE ){ $entry_rqrd=FALSE;push @list, { tid_set=>{}, weight=>$global/$glb_cnt } };
         $list[-1]{tid_set}{$t} = TRUE;
      }
   }
   for my $i (0..$#list)
   {
      my $cnt  = scalar keys %{$list[$i]{tid_set}};
      my $name = "$cnt";
      for my $j (0..$#list){ if( $j != $i && scalar keys %{$list[$j]{tid_set}} == $cnt ){ $name = "$cnt"."s$i";last; } }
      $list[$i]{name} = $name;
   }
   print "\n";
   for(@list){ printf "SET: %8.1f  /%-5s := { %s }\n", $_->{weight}, $_->{name}, (join ',', (sort{$a<=>$b} keys %{$_->{tid_set}})); }
   print "\n";


   for my $i (0..$last)
   {
      my $tid_cnt = 0;
      my %tid_set = ();
      for(@{$info[$i]{blocks}}){ if( !exists $tid_set{$_->{tid}} ){ $tid_set{$_->{tid}}=TRUE;$tid_cnt++; } }
      #$tid_cnt = 64;for( my $t = 0; $t < $tid_cnt; $t++ ){ $tid_set{$t} = TRUE; }
      #if( $tid_cnt > 1 )
      #{
      #my $tids = join ',', (sort{$a<=>$b} keys %tid_set);
      my $max_cmn = 0;
      my $bst_set = 0;
      for my $s (0..$#list)
      {
         my $cmn = 0;for(keys %tid_set){ $cmn++ if exists $list[$s]{tid_set}{$_}; }
         #print $funcs_info[$info[$i]{function}]{name},' ',scalar(keys %tid_set),' ',$cmn,' ',scalar(keys %{$list[$s]{tid_set}}),' ',scalar(keys %{$list[$bst_set]{tid_set}}),"\n";
         if( $cmn > $max_cmn || ( $cmn == $max_cmn && scalar( keys %{$list[$s]{tid_set}} ) < scalar( keys %{$list[$bst_set]{tid_set}} ) ) )
         {
             $max_cmn = $cmn;
             $bst_set = $s;
         }
      }
      #printf "%x %d %s %d\n",$info[$i]{blocks}[0]{address},$tid_cnt,$list[$bst_set]{name},$max_cmn;
      my $fidx = $info[$i]{function};
      my $pidx = add_events_to_func $info[$i]{module}, $funcs_info[$fidx]{name}."/$list[$bst_set]{name}", $info[$i]{instrs}, $info[$i]{cycles}, $info[$i]{events};
      $info[$i]{function} = $pidx;
      if( exists $funcs_info[$pidx]{groups}{$bst_set} ){ push @{$funcs_info[$pidx]{groups}{$bst_set}}, $i; }
      else{ $funcs_info[$pidx]{groups}{$bst_set} = [ $i ]; }
      $funcs_info[$fidx]{instrs} -= $info[$i]{instrs} if defined $info[$i]{instrs};
      $funcs_info[$fidx]{cycles} -= $info[$i]{cycles} if defined $info[$i]{cycles};
      $funcs_info[$fidx]{events} -= $info[$i]{events} if defined $info[$i]{events};
      #}
   }
   for my $f (@funcs_info){if( $f->{groups} )
   {
      while( my ($set,$idxs) = each %{$f->{groups}} )
      {
         my @stats = ();
         for my $t (keys %{$list[$set]{tid_set}})
         {
            push @stats, { tid=>$t, instrs=>0, cycles=>0, events=>0 };
            for my $i (@{$idxs}){for(@{$info[$i]{blocks}}){if( $_->{tid} == $t )
            {
               $stats[-1]{instrs} += $_->{instrs} if defined $_->{instrs};
               $stats[-1]{cycles} += $_->{cycles} if defined $_->{cycles};
               $stats[-1]{events} += $_->{events} if defined $_->{events};
            }}}
         }
         # critical path
         my $slw_idx    = 0;
         my $fst_idx    = undef;
         my $key        = 'cycles';
         for my $j (0..$#stats)
         {
            $slw_idx = $j if $stats[$j]{$key} > $stats[$slw_idx]{$key};
            $fst_idx = $j if $stats[$j]{$key} > 0 && ( !defined $fst_idx || $stats[$j]{$key} < $stats[$fst_idx]{$key} );
         }
         #print "$f->{name}: $stats[$slw_idx]{tid}";
         #printf " %.3f",($stats[$slw_idx]{$key}/$stats[$fst_idx]{$key}) if defined $fst_idx;
         #print "\n";
         $f->{slw_tid} = $stats[$slw_idx]{tid};
         $f->{slw_fst} = ($stats[$slw_idx]{$key}/$stats[$fst_idx]{$key}) if defined $fst_idx;

         my $adj_instrs = 0;
         my $adj_cycles = 0;
         my $adj_events = 0;
         for my $i (@{$idxs})
         {
            my $instrs = 0;
            my $cycles = 0;
            my $events = 0;
            my $cnt    = scalar @{$info[$i]{blocks}};
            for( my $j = $cnt-1; $j >= 0; $j-- )
            {
               if( $info[$i]{blocks}[$j]{tid} == $stats[$slw_idx]{tid} )
               {
                  $instrs += $info[$i]{blocks}[$j]{instrs} if defined $info[$i]{blocks}[$j]{instrs};
                  $cycles += $info[$i]{blocks}[$j]{cycles} if defined $info[$i]{blocks}[$j]{cycles};
                  $events += $info[$i]{blocks}[$j]{events} if defined $info[$i]{blocks}[$j]{events};
               }
               else{ splice @{$info[$i]{blocks}}, $j, 1; }
            }
            $adj_instrs += $info[$i]{instrs} - $instrs if defined $info[$i]{instrs};
            $adj_cycles += $info[$i]{cycles} - $cycles if defined $info[$i]{cycles};
            $adj_events += $info[$i]{events} - $events if defined $info[$i]{events};
            $info[$i]{instrs} = $instrs if defined $info[$i]{instrs};
            $info[$i]{cycles} = $cycles if defined $info[$i]{cycles};
            $info[$i]{events} = $events if defined $info[$i]{events};
         }

         $f->{instrs} -= $adj_instrs if defined $f->{instrs};
         $f->{cycles} -= $adj_cycles if defined $f->{cycles};
         $f->{events} -= $adj_events if defined $f->{events};
         #print "$f->{cycles}\n";
      }
   }}
   @info = grep { @{$_->{blocks}} } @info;
}
if( $dump_rqrd == FALSE )
{
   if( $csv_rqrd == TRUE )
   {
      print "function";
      print ",instrs" if defined $ctb_rte;
      print ",cycles" if defined $ccl_rte;
      print ",cpi"    if defined $ctb_rte && defined $ccl_rte;
      print ",slowest,max/min\n";
      for( sort {   defined $a->{cycles} && defined $b->{cycles} ? $b->{cycles} <=> $a->{cycles} : 
                  ( defined $a->{instrs} && defined $b->{instrs} ? $b->{instrs} <=> $a->{instrs} : $b->{events} <=> $a->{events} ) } @funcs_info )
      {
         next if ( defined $_->{instrs} && $_->{instrs} == 0 ) || ( defined $_->{cycles} && $_->{cycles} == 0 );
         print  "$_->{name}";
         printf ",%.0f",$_->{instrs}*$ctb_rte if defined $_->{instrs};
         printf ",%.0f",$_->{cycles}*$ccl_rte if defined $_->{cycles};
         printf ",%.5f",($_->{cycles}/$_->{instrs})*($ccl_rte/$ctb_rte) if defined $_->{instrs} && defined $_->{cycles} && $_->{instrs} > 0;
         print  ",",(exists $_->{slw_tid} ? $_->{slw_tid} : 'N/A' );
         printf ",%.2f",(exists $_->{slw_fst} ? $_->{slw_fst} : 1.0 );
         print  "\n";
      }
   }
   else
   {
      for my $h ( sort { ( exists $b->{function} ? sqrt($b->{$key_event})*sqrt($funcs_info[$b->{function}]{$key_event}) : $b->{$key_event} )
                     <=> ( exists $a->{function} ? sqrt($a->{$key_event})*sqrt($funcs_info[$a->{function}]{$key_event}) : $a->{$key_event} ) } @info )
      {
         next if scalar @{$h->{blocks}} == 0;
         print "$h->{$key_event} $h->{module}";
         my $need_base = ( !grep { $_->{base} == $h->{base} || exists $_->{step} && $h->{base} >= $_->{base} && ( $h->{base} & ($_->{step}-1) ) == 0 } @DEFAULT_BASES ) ? TRUE : FALSE;
         print ":$h->{base}" if $need_base == TRUE;
         print " $funcs_info[$h->{function}]{name}";
         for( sort { $b->{$key_event} <=> $a->{$key_event} } @{$h->{blocks}} ){ print ' '.( ( $need_base == FALSE ? $h->{base} : 0 ) + $_->{address} ).":$_->{size}"; }
         print "\n";
      }
   }
}
elsif( $dump_sptd == FALSE )
{
   # hack, need module base addr info from amplifier
   my %fake_bases = ();
   my $lst_base   = 0x400000000000;
   my $step       = 0x001000000000;
   for(@info)
   {
      if( $_->{base} == 0 && $_->{module} =~ /\.so(\.\d+)*$/o )
      {
         my $base = 0;
         if( exists $fake_bases{$_->{module}} ){ $base = $fake_bases{$_->{module}}; }
         else
         {
            $base      = $lst_base;
            $lst_base += $step;
            $fake_bases{$_->{module}} = $base;
         }
         $_->{base} = $base;
      }
   }

   print '#';
   print $ctb_rte if defined $ctb_rte;
   print " $ccl_rte" if defined $ccl_rte;
   print "\n";
   my $mdl = undef;
   for my $h ( sort { $a->{base} == $b->{base} ? ( $a->{module} eq $b->{module} ? $a->{blocks}[0]{address} <=> $b->{blocks}[0]{address} : $a->{module} cmp $b->{module} )
                    : $a->{base} <=> $b->{base} } @info )
   {
      my $cur_mdl = undef;
      for( sort { $a->{address} <=> $b->{address} } @{$h->{blocks}} )
      {
         next if $events_flt > 0 && defined $ctb_rte && defined $ccl_rte && ( $_->{instrs} <= $events_flt || $_->{cycles} <= $events_flt ); # need to "combine" BB with insignificant amount of instrs/cycles, otherwise CPI error will be huge
         if( ( !defined $ctb_rte || $_->{instrs} > 0 ) && ( !defined $ccl_rte || $_->{cycles} > 0 ) )
         {
            if( !defined $cur_mdl )
            {
               $cur_mdl = $h->{module};
               if( !defined $mdl || $cur_mdl ne $mdl )
               {
                  $mdl = $cur_mdl;
                  printf "#%s:%x\n", $mdl, $h->{base};
               }
            }
            printf '%x %d', $_->{address}, $_->{size};
            print " $_->{instrs}" if defined $ctb_rte;
            print " $_->{cycles}" if defined $ccl_rte;
            print "\n";
         }
      }
   }
}
